<?php
/**
 * Zend Framework
 *
 * LICENSE
 *
 * This source file is subject to the new BSD license that is bundled
 * with this package in the file LICENSE.txt.
 * It is also available through the world-wide-web at this URL:
 * http://framework.zend.com/license/new-bsd
 * If you did not receive a copy of the license and are unable to
 * obtain it through the world-wide-web, please send an email
 * to license@zend.com so we can send you a copy immediately.
 *
 * @category   Zend
 * @package    Zend_Search_Lucene
 * @subpackage Search
 * @copyright  Copyright (c) 2005-2009 Zend Technologies USA Inc. (http://www.zend.com)
 * @license    http://framework.zend.com/license/new-bsd     New BSD License
 * @version    $Id: MultiTerm.php 18954 2009-11-12 20:01:33Z alexander $
 */


/** Zend_Search_Lucene_Search_Query */
require_once 'Zend/Search/Lucene/Search/Query.php';


/**
 * @category   Zend
 * @package    Zend_Search_Lucene
 * @subpackage Search
 * @copyright  Copyright (c) 2005-2009 Zend Technologies USA Inc. (http://www.zend.com)
 * @license    http://framework.zend.com/license/new-bsd     New BSD License
 */
class Zend_Search_Lucene_Search_Query_MultiTerm extends Zend_Search_Lucene_Search_Query
{

	/**
	 * Terms to find.
	 * Array of Zend_Search_Lucene_Index_Term
	 *
	 * @var array
	 */
	private $_terms = array();

	/**
	 * Term signs.
	 * If true then term is required.
	 * If false then term is prohibited.
	 * If null then term is neither prohibited, nor required
	 *
	 * If array is null then all terms are required
	 *
	 * @var array
	 */
	private $_signs;

	/**
	 * Result vector.
	 *
	 * @var array
	 */
	private $_resVector = null;

	/**
	 * Terms positions vectors.
	 * Array of Arrays:
	 * term1Id => (docId => freq, ...)
	 * term2Id => (docId => freq, ...)
	 *
	 * @var array
	 */
	private $_termsFreqs = array();


	/**
	 * A score factor based on the fraction of all query terms
	 * that a document contains.
	 * float for conjunction queries
	 * array of float for non conjunction queries
	 *
	 * @var mixed
	 */
	private $_coord = null;


	/**
	 * Terms weights
	 * array of Zend_Search_Lucene_Search_Weight
	 *
	 * @var array
	 */
	private $_weights = array();


	/**
	 * Class constructor.  Create a new multi-term query object.
	 *
	 * if $signs array is omitted then all terms are required
	 * it differs from addTerm() behavior, but should never be used
	 *
	 * @param array $terms    Array of Zend_Search_Lucene_Index_Term objects
	 * @param array $signs    Array of signs.  Sign is boolean|null.
	 * @throws Zend_Search_Lucene_Exception
	 */
	public function __construct($terms = null, $signs = null)
	{
		if (is_array($terms)) {
			require_once 'Zend/Search/Lucene.php';
			if (count($terms) > Zend_Search_Lucene::getTermsPerQueryLimit()) {
				throw new Zend_Search_Lucene_Exception('Terms per query limit is reached.');
			}

			$this->_terms = $terms;

			$this->_signs = null;
			// Check if all terms are required
			if (is_array($signs)) {
				foreach ($signs as $sign ) {
					if ($sign !== true) {
						$this->_signs = $signs;
						break;
					}
				}
			}
		}
	}


	/**
	 * Add a $term (Zend_Search_Lucene_Index_Term) to this query.
	 *
	 * The sign is specified as:
	 *     TRUE  - term is required
	 *     FALSE - term is prohibited
	 *     NULL  - term is neither prohibited, nor required
	 *
	 * @param  Zend_Search_Lucene_Index_Term $term
	 * @param  boolean|null $sign
	 * @return void
	 */
	public function addTerm(Zend_Search_Lucene_Index_Term $term, $sign = null) {
		if ($sign !== true || $this->_signs !== null) {       // Skip, if all terms are required
			if ($this->_signs === null) {                     // Check, If all previous terms are required
				$this->_signs = array();
				foreach ($this->_terms as $prevTerm) {
					$this->_signs[] = true;
				}
			}
			$this->_signs[] = $sign;
		}

		$this->_terms[] = $term;
	}


	/**
	 * Re-write query into primitive queries in the context of specified index
	 *
	 * @param Zend_Search_Lucene_Interface $index
	 * @return Zend_Search_Lucene_Search_Query
	 */
	public function rewrite(Zend_Search_Lucene_Interface $index)
	{
		if (count($this->_terms) == 0) {
			require_once 'Zend/Search/Lucene/Search/Query/Empty.php';
			return new Zend_Search_Lucene_Search_Query_Empty();
		}

		// Check, that all fields are qualified
		$allQualified = true;
		foreach ($this->_terms as $term) {
			if ($term->field === null) {
				$allQualified = false;
				break;
			}
		}

		if ($allQualified) {
			return $this;
		} else {
			/** transform multiterm query to boolean and apply rewrite() method to subqueries. */
			require_once 'Zend/Search/Lucene/Search/Query/Boolean.php';
			$query = new Zend_Search_Lucene_Search_Query_Boolean();
			$query->setBoost($this->getBoost());

			require_once 'Zend/Search/Lucene/Search/Query/Term.php';
			foreach ($this->_terms as $termId => $term) {
				$subquery = new Zend_Search_Lucene_Search_Query_Term($term);

				$query->addSubquery($subquery->rewrite($index),
				($this->_signs === null)?  true : $this->_signs[$termId]);
			}

			return $query;
		}
	}

	/**
	 * Optimize query in the context of specified index
	 *
	 * @param Zend_Search_Lucene_Interface $index
	 * @return Zend_Search_Lucene_Search_Query
	 */
	public function optimize(Zend_Search_Lucene_Interface $index)
	{
		$terms = $this->_terms;
		$signs = $this->_signs;

		foreach ($terms as $id => $term) {
			if (!$index->hasTerm($term)) {
				if ($signs === null  ||  $signs[$id] === true) {
					// Term is required
					require_once 'Zend/Search/Lucene/Search/Query/Empty.php';
					return new Zend_Search_Lucene_Search_Query_Empty();
				} else {
					// Term is optional or prohibited
					// Remove it from terms and signs list
					unset($terms[$id]);
					unset($signs[$id]);
				}
			}
		}

		// Check if all presented terms are prohibited
		$allProhibited = true;
		if ($signs === null) {
			$allProhibited = false;
		} else {
			foreach ($signs as $sign) {
				if ($sign !== false) {
					$allProhibited = false;
					break;
				}
			}
		}
		if ($allProhibited) {
			require_once 'Zend/Search/Lucene/Search/Query/Empty.php';
			return new Zend_Search_Lucene_Search_Query_Empty();
		}

		/**
		 * @todo make an optimization for repeated terms
		 * (they may have different signs)
		 */

		if (count($terms) == 1) {
			// It's already checked, that it's not a prohibited term

			// It's one term query with one required or optional element
			require_once 'Zend/Search/Lucene/Search/Query/Term.php';
			$optimizedQuery = new Zend_Search_Lucene_Search_Query_Term(reset($terms));
			$optimizedQuery->setBoost($this->getBoost());

			return $optimizedQuery;
		}

		if (count($terms) == 0) {
			require_once 'Zend/Search/Lucene/Search/Query/Empty.php';
			return new Zend_Search_Lucene_Search_Query_Empty();
		}

		$optimizedQuery = new Zend_Search_Lucene_Search_Query_MultiTerm($terms, $signs);
		$optimizedQuery->setBoost($this->getBoost());
		return $optimizedQuery;
	}


	/**
	 * Returns query term
	 *
	 * @return array
	 */
	public function getTerms()
	{
		return $this->_terms;
	}


	/**
	 * Return terms signs
	 *
	 * @return array
	 */
	public function getSigns()
	{
		return $this->_signs;
	}


	/**
	 * Set weight for specified term
	 *
	 * @param integer $num
	 * @param Zend_Search_Lucene_Search_Weight_Term $weight
	 */
	public function setWeight($num, $weight)
	{
		$this->_weights[$num] = $weight;
	}


	/**
	 * Constructs an appropriate Weight implementation for this query.
	 *
	 * @param Zend_Search_Lucene_Interface $reader
	 * @return Zend_Search_Lucene_Search_Weight
	 */
	public function createWeight(Zend_Search_Lucene_Interface $reader)
	{
		require_once 'Zend/Search/Lucene/Search/Weight/MultiTerm.php';
		$this->_weight = new Zend_Search_Lucene_Search_Weight_MultiTerm($this, $reader);
		return $this->_weight;
	}


	/**
	 * Calculate result vector for Conjunction query
	 * (like '+something +another')
	 *
	 * @param Zend_Search_Lucene_Interface $reader
	 */
	private function _calculateConjunctionResult(Zend_Search_Lucene_Interface $reader)
	{
		$this->_resVector = null;

		if (count($this->_terms) == 0) {
			$this->_resVector = array();
		}

		// Order terms by selectivity
		$docFreqs = array();
		$ids      = array();
		foreach ($this->_terms as $id => $term) {
			$docFreqs[] = $reader->docFreq($term);
			$ids[]      = $id; // Used to keep original order for terms with the same selectivity and omit terms comparison
		}
		array_multisort($docFreqs, SORT_ASC, SORT_NUMERIC,
		$ids,      SORT_ASC, SORT_NUMERIC,
		$this->_terms);

		require_once 'Zend/Search/Lucene/Index/DocsFilter.php';
		$docsFilter = new Zend_Search_Lucene_Index_DocsFilter();
		foreach ($this->_terms as $termId => $term) {
			$termDocs = $reader->termDocs($term, $docsFilter);
		}
		// Treat last retrieved docs vector as a result set
		// (filter collects data for other terms)
		$this->_resVector = array_flip($termDocs);

		foreach ($this->_terms as $termId => $term) {
			$this->_termsFreqs[$termId] = $reader->termFreqs($term, $docsFilter);
		}

		// ksort($this->_resVector, SORT_NUMERIC);
		// Docs are returned ordered. Used algorithms doesn't change elements order.
	}


	/**
	 * Calculate result vector for non Conjunction query
	 * (like '+something -another')
	 *
	 * @param Zend_Search_Lucene_Interface $reader
	 */
	private function _calculateNonConjunctionResult(Zend_Search_Lucene_Interface $reader)
	{
		$requiredVectors      = array();
		$requiredVectorsSizes = array();
		$requiredVectorsIds   = array(); // is used to prevent arrays comparison

		$optional   = array();
		$prohibited = array();

		foreach ($this->_terms as $termId => $term) {
			$termDocs = array_flip($reader->termDocs($term));

			if ($this->_signs[$termId] === true) {
				// required
				$requiredVectors[]      = $termDocs;
				$requiredVectorsSizes[] = count($termDocs);
				$requiredVectorsIds[]   = $termId;
			} elseif ($this->_signs[$termId] === false) {
				// prohibited
				// array union
				$prohibited += $termDocs;
			} else {
				// neither required, nor prohibited
				// array union
				$optional += $termDocs;
			}

			$this->_termsFreqs[$termId] = $reader->termFreqs($term);
		}

		// sort resvectors in order of subquery cardinality increasing
		array_multisort($requiredVectorsSizes, SORT_ASC, SORT_NUMERIC,
		$requiredVectorsIds,   SORT_ASC, SORT_NUMERIC,
		$requiredVectors);

		$required = null;
		foreach ($requiredVectors as $nextResVector) {
			if($required === null) {
				$required = $nextResVector;
			} else {
				//$required = array_intersect_key($required, $nextResVector);

				/**
				 * This code is used as workaround for array_intersect_key() slowness problem.
				 */
				$updatedVector = array();
				foreach ($required as $id => $value) {
					if (isset($nextResVector[$id])) {
						$updatedVector[$id] = $value;
					}
				}
				$required = $updatedVector;
			}

			if (count($required) == 0) {
				// Empty result set, we don't need to check other terms
				break;
			}
		}

		if ($required !== null) {
			$this->_resVector = $required;
		} else {
			$this->_resVector = $optional;
		}

		if (count($prohibited) != 0) {
			// $this->_resVector = array_diff_key($this->_resVector, $prohibited);

			/**
			 * This code is used as workaround for array_diff_key() slowness problem.
			 */
			if (count($this->_resVector) < count($prohibited)) {
				$updatedVector = $this->_resVector;
				foreach ($this->_resVector as $id => $value) {
					if (isset($prohibited[$id])) {
						unset($updatedVector[$id]);
					}
				}
				$this->_resVector = $updatedVector;
			} else {
				$updatedVector = $this->_resVector;
				foreach ($prohibited as $id => $value) {
					unset($updatedVector[$id]);
				}
				$this->_resVector = $updatedVector;
			}
		}

		ksort($this->_resVector, SORT_NUMERIC);
	}


	/**
	 * Score calculator for conjunction queries (all terms are required)
	 *
	 * @param integer $docId
	 * @param Zend_Search_Lucene_Interface $reader
	 * @return float
	 */
	public function _conjunctionScore($docId, Zend_Search_Lucene_Interface $reader)
	{
		if ($this->_coord === null) {
			$this->_coord = $reader->getSimilarity()->coord(count($this->_terms),
			count($this->_terms) );
		}

		$score = 0.0;

		foreach ($this->_terms as $termId => $term) {
			/**
			 * We don't need to check that term freq is not 0
			 * Score calculation is performed only for matched docs
			 */
			$score += $reader->getSimilarity()->tf($this->_termsFreqs[$termId][$docId]) *
			$this->_weights[$termId]->getValue() *
			$reader->norm($docId, $term->field);
		}

		return $score * $this->_coord * $this->getBoost();
	}


	/**
	 * Score calculator for non conjunction queries (not all terms are required)
	 *
	 * @param integer $docId
	 * @param Zend_Search_Lucene_Interface $reader
	 * @return float
	 */
	public function _nonConjunctionScore($docId, $reader)
	{
		if ($this->_coord === null) {
			$this->_coord = array();

			$maxCoord = 0;
			foreach ($this->_signs as $sign) {
				if ($sign !== false /* not prohibited */) {
					$maxCoord++;
				}
			}

			for ($count = 0; $count <= $maxCoord; $count++) {
				$this->_coord[$count] = $reader->getSimilarity()->coord($count, $maxCoord);
			}
		}

		$score = 0.0;
		$matchedTerms = 0;
		foreach ($this->_terms as $termId=>$term) {
			// Check if term is
			if ($this->_signs[$termId] !== false &&        // not prohibited
			isset($this->_termsFreqs[$termId][$docId]) // matched
			) {
				$matchedTerms++;

				/**
				 * We don't need to check that term freq is not 0
				 * Score calculation is performed only for matched docs
				 */
				$score +=
				$reader->getSimilarity()->tf($this->_termsFreqs[$termId][$docId]) *
				$this->_weights[$termId]->getValue() *
				$reader->norm($docId, $term->field);
			}
		}

		return $score * $this->_coord[$matchedTerms] * $this->getBoost();
	}

	/**
	 * Execute query in context of index reader
	 * It also initializes necessary internal structures
	 *
	 * @param Zend_Search_Lucene_Interface $reader
	 * @param Zend_Search_Lucene_Index_DocsFilter|null $docsFilter
	 */
	public function execute(Zend_Search_Lucene_Interface $reader, $docsFilter = null)
	{
		if ($this->_signs === null) {
			$this->_calculateConjunctionResult($reader);
		} else {
			$this->_calculateNonConjunctionResult($reader);
		}

		// Initialize weight if it's not done yet
		$this->_initWeight($reader);
	}

	/**
	 * Get document ids likely matching the query
	 *
	 * It's an array with document ids as keys (performance considerations)
	 *
	 * @return array
	 */
	public function matchedDocs()
	{
		return $this->_resVector;
	}

	/**
	 * Score specified document
	 *
	 * @param integer $docId
	 * @param Zend_Search_Lucene_Interface $reader
	 * @return float
	 */
	public function score($docId, Zend_Search_Lucene_Interface $reader)
	{
		if (isset($this->_resVector[$docId])) {
			if ($this->_signs === null) {
				return $this->_conjunctionScore($docId, $reader);
			} else {
				return $this->_nonConjunctionScore($docId, $reader);
			}
		} else {
			return 0;
		}
	}

	/**
	 * Return query terms
	 *
	 * @return array
	 */
	public function getQueryTerms()
	{
		if ($this->_signs === null) {
			return $this->_terms;
		}

		$terms = array();

		foreach ($this->_signs as $id => $sign) {
			if ($sign !== false) {
				$terms[] = $this->_terms[$id];
			}
		}

		return $terms;
	}

	/**
	 * Query specific matches highlighting
	 *
	 * @param Zend_Search_Lucene_Search_Highlighter_Interface $highlighter  Highlighter object (also contains doc for highlighting)
	 */
	protected function _highlightMatches(Zend_Search_Lucene_Search_Highlighter_Interface $highlighter)
	{
		$words = array();

		if ($this->_signs === null) {
			foreach ($this->_terms as $term) {
				$words[] = $term->text;
			}
		} else {
			foreach ($this->_signs as $id => $sign) {
				if ($sign !== false) {
					$words[] = $this->_terms[$id]->text;
				}
			}
		}

		$highlighter->highlight($words);
	}

	/**
	 * Print a query
	 *
	 * @return string
	 */
	public function __toString()
	{
		// It's used only for query visualisation, so we don't care about characters escaping

		$query = '';

		foreach ($this->_terms as $id => $term) {
			if ($id != 0) {
				$query .= ' ';
			}

			if ($this->_signs === null || $this->_signs[$id] === true) {
				$query .= '+';
			} else if ($this->_signs[$id] === false) {
				$query .= '-';
			}

			if ($term->field !== null) {
				$query .= $term->field . ':';
			}
			$query .= $term->text;
		}

		if ($this->getBoost() != 1) {
			$query = '(' . $query . ')^' . round($this->getBoost(), 4);
		}

		return $query;
	}
}

